Introduction

The purpose of this report is to document State of the Ecosystem (SOE) indicator data processing. All R code used to process and visualize the following data sets is self-contained in the Rmarkdown document associated with this HTML file. To run and update data sets in this document, set the save_clean parameter in the set-up chunk to TRUE. Raw data for these indicators are available in the file directory soe/inst/extdata.

Data sets

Surface winds

These data are sourced from the NCEP North American Regional Reanalysis (NARR), extending from January 1979 to September 2018.

Variables in “NCEP NARR surface wind; TKE; HLCY, monthly, 1979-2018, V1.csv”
Variable Name Units
Wind speed uwnd m sec-1
Wind direction vwnd °
Turbulent kinetic energy tke J kg-1
Storm relative helicity hlcy m2sec-2

Variables included in these data are surface wind speed and direction (uwnd and vwnd respectively), surface turbulent kinetic energy (TKE), and storm relative helicity (HLCY). An indicator for total wind speed is calculated below as \[\textrm{TWS} = \sqrt{u^2 + v^2}\]. Data are visualized seasonally (Fall = October, November, December; Winter = January, February, March; Spring = April, May, June; Summer = July, August, September).

Filename: NCEP NARR surface wind; TKE; HLCY, monthly, 1979-2018, V1.csv
Contributor: Vincent Saba, ()

Processing

# Read in raw data
d <- read.csv(file.path(raw.dir,"NCEP NARR surface wind; TKE; HLCY, monthly, 1979-2018, V1.csv"))

# Processing steps for all wind speed data
wind_clean1 <- d  %>% gather(., Var, Value, GB.uwnd:MAB.tke) %>% #convert wide to long
  dplyr::rename(Time = Month.Year) %>% #rename time variable
  separate(Var, c("EPU","Var"),"\\.") %>% #separate out EPU from variable names
    mutate(Time = dmy(.$Time), #Convert to date format
         Units = plyr::mapvalues(Var, from = unique(Var), to = c(rep("J/kg",2),"m^2/sec^2","J/kg")), #add units
         Time, season = plyr::mapvalues(month(Time), from = seq(1,12,1), #Get season
                                         to = c(rep("winter",3),
                                                rep("spring",3),
                                                rep("summer",3),
                                                rep("fall",3)))) 


# Calculate total wind speed from u and v components
total_wind_speed <- wind_clean1 %>% 
  filter(Var == "uwnd"| Var == "vwnd") %>% #select variables
  spread(., Var, Value) %>% #convert to wide for calculating tws
  mutate(`total wind speed` = sqrt(uwnd^2 + vwnd^2)) %>%  #tws
  dplyr::select(-uwnd, -vwnd) %>% #start processing back to SOE format
  gather(.,Var, Value, `total wind speed`) #convert to long

wind_clean <- rbind(wind_clean1, total_wind_speed)
wind_clean <- wind_clean %>%
  unite(., Var, c(Var, season), sep = " ") %>% #merge season into Var column
  group_by(Time = year(Time), EPU, Var, Units) %>% 
  dplyr::summarise(Value = mean(Value)) %>% 
  as.data.frame()

if (save_clean){
save(wind_clean, file =
       file.path(clean.dir, "wind_clean.Rdata"))
}

MAB Total Wind Speed

GB Total Wind Speed

GOM Total Wind Speed

MAB Helicity

GB Helicity

GOM Helicity

MAB TKE

GB TKE

GOM TKE

Slopewater proportions

Slopewater proportions give the percent total of water type observed in the deep Northeast Channel (150-200 m depth).

Variables in “slopewater_proportions.csv”
Variable Names Units
Warm Slope Water proportion WSW unitless
Labrador Shelf Slope Water proportion LSLW unitless

Raw data fields correspond to year, water mass flavor (WSW = Warm Slope Water, LSLW = Labrador Slope Water), and proportion of total expressed as a percentage.

Filename: slopewater_proportions.csv
Contributor: Paula Fratantoni ()

Processing

d <- read.csv(file.path(raw.dir,"slopewater_proportions.csv"))

slopewater <- d %>%
  dplyr::rename(Time = year, Var = water.mass.flavor, Value = prop) %>% 
  mutate(EPU = "all", Units = "unitless", Var2 = "proportion ne channel") %>% 
  unite(.,Var,c(Var,Var2), sep = " ")

if (save_clean){
save(slopewater, file =
       file.path(clean.dir, "slopewater_proportions.Rdata"))
}

Visualization

slopewater %>% 
  mutate(Var, Var = plyr::mapvalues(Var, from = c("WSW proportion ne channel",
                                                  "LSLW proportion ne channel"),
                                    to = c("WSW","LSLW"))) %>% 
  dplyr::rename(Flavor  = Var) %>% 
ggplot() +
  geom_line(aes(x = Time, y = Value, color = Flavor))+
  geom_point(aes(x = Time, y = Value, color = Flavor)) +
  ylab("Percent of Total Slopewater") +
  ggtitle("Slopewater Proportions in NE Channel")+
  theme_bw()+
  theme(strip.background = element_blank())

Ocean temperature anomaly

These data include in situ regional time series of both surface and bottom water temperature anomalies on the Northeast Continental Shelf. Raw data is split into four files by EPU (SS, GOM, GB, and MAB).

Variables in “Eco{EPU}_core_Ttopbot.csv”
Variable Names Units
SST anomaly Tsfc_anom °C
Reference SST (1981-2010) Tsfc_ref °C
Bottom temp. anomaly Tbot_anom °C
Reference BT (1981-2010) Tbot_ref °C

Filenames: EcoSS_core_Ttopbot.csv, EcoGoM_core_Ttopbot.csv, EcoGB_core_Ttopbot.csv, EcoMAB_core_Ttopbot.csv
Contributor: Paula Fratantoni ()

Processing

ss <- read.csv(file.path(raw.dir,"EcoSS_core_Ttopbot.csv")) %>% mutate(EPU = "SS")
gom <- read.csv(file.path(raw.dir,"EcoGoM_core_Ttopbot.csv")) %>% mutate(EPU = "GOM")
gb <- read.csv(file.path(raw.dir,"EcoGB_core_Ttopbot.csv")) %>% mutate(EPU = "GB")
mab <- read.csv(file.path(raw.dir,"EcoMAB_core_Ttopbot.csv")) %>% mutate(EPU = "MAB")

ocean_temp_insitu <- rbind(ss, gom, gb, mab) %>% #bind all
  dplyr::rename(Time = decimal.year, Var = variable.name, Value = temperature) %>% #rename
  mutate(Units = "degreesC", Time = as.Date(format(date_decimal(Time), "%Y-%b-%d"), "%Y-%b-%d"),
         Var, Var = plyr::mapvalues(Var, from = c("Tsfc_anom",#Rename variables
                             "Tsfc_ref",
                             "Tbot_anom",
                             "Tbot_ref"),
                             to = c("sst anomaly in situ",
                                "reference sst in situ (1981-2010)",
                                "bottom temp anomaly in situ",
                                "reference bt in situ (1981-2010)"))) %>% 
  group_by(Time = year(Time), EPU, Var, Units) %>%
  dplyr::summarise(Value = mean(Value)) %>%
  as.data.frame() 

if (save_clean){
save(ocean_temp_insitu, file =
       file.path(clean.dir, "ocean_temp_insitu.Rdata"))
}

Visualization

Ocean salinity anomaly

These data include in situ regional time series of both surface and bottom salinity anomalies on the Northeast Continental Shelf. Raw data is split into four files by EPU (SS, GOM, GB, and MAB).

Variables in “Eco{EPU}_core_Stopbot.csv”
Variable Names Units
Surface salinity anomaly Ssfc_anom PSU
Reference surface salinity (1981-2010) Ssfc_ref PSU
Bottom salinity anomaly Sbot_anom PSU
Reference bottom salinity (1981-2010) Sbot_ref PSU

Filenames: EcoSS_core_Stopbot.csv, EcoGoM_core_Stopbot.csv, EcoGB_core_Stopbot.csv, EcoMAB_core_Stopbot.csv
Contributor: Paula Fratantoni ()

Processing

ss <- read.csv(file.path(raw.dir,"EcoSS_core_Stopbot.csv")) %>% mutate(EPU = "SS")
gom <- read.csv(file.path(raw.dir,"EcoGoM_core_Stopbot.csv")) %>% mutate(EPU = "GOM")
gb <- read.csv(file.path(raw.dir,"EcoGB_core_Stopbot.csv")) %>% mutate(EPU = "GB")
mab <- read.csv(file.path(raw.dir,"EcoMAB_core_Stopbot.csv")) %>% mutate(EPU = "MAB")

ocean_sal_insitu <- rbind(ss, gom, gb, mab) %>% #bind all
  dplyr::rename(Time = decimal.year, Var = variable.name, Value = salinity) %>% #rename
  mutate(Units = "PSU", Time = as.Date(format(date_decimal(Time), "%Y-%b-%d"), "%Y-%b-%d"),
         Var, Var = plyr::mapvalues(Var, from = c("Ssfc_anom",
                             "Ssfc_ref",
                             "Sbot_anom",
                             "Sbot_ref"),
                     to = c("surface salinity anomaly in situ",
                        "reference surface salinity in situ (1981-2010)",
                        "bottom salinity anomaly in situ",
                        "reference bottom salinity in situ (1981-2010)"))) %>% 
  group_by(Time = year(Time), EPU, Var, Units) %>%
  dplyr::summarise(Value = mean(Value)) %>%
  as.data.frame()

if (save_clean){
save(ocean_sal_insitu, file =
       file.path(clean.dir, "ocean_sal_insitu.Rdata"))
}

Visualization

Stratification

These data are time series of average stratification (0-50 m depth) by EPU.

Filename: Strat50.csv Contributor: Paula Fratantoni ()

Variables in “Strat50.csv”
Variable Names Units
stratification stratification kg m -3

Processing

strat <- read.csv(file.path(raw.dir, "Strat50.csv"), stringsAsFactors = FALSE)

stratification <- strat %>% 
  dplyr::rename(Time = time, Var = var, Value = stratification) %>% 
  separate(., Var, c("Var","EPU"), sep = "_") %>% 
  mutate(Var = "stratification (0-50 m)",
         Units = "kg m^-3")

if (save_clean){
  save(stratification, file = file.path(clean.dir, "stratification.Rdata"))
}

Visualization

EcoMon Nutrient Data

These data include nutrient data sampled on EcoMon cruises between 11/3/2009 - 10/19/2016.

Filename: EcoMon Nutrient Data Through June 2018.csv
Contributor: Chris Melrose ()

Processing

d <- read.csv(file.path(raw.dir,"EcoMon Nutrient Data Through June 2018.csv"), stringsAsFactors = FALSE)

#Create data frame for mapping units to variable names
mapping <- data.frame(Units = as.character(d[1,]),
                      Var = as.character(names(d)))
mapping[mapping$Units == "" | mapping$Units == "NA",]$Units <- NA

#remove row with units
d <- slice(d,-1)

d1 <- d %>% 
  mutate(Time = Date_UTC) %>% #create Time variable
  dplyr::select(-Date_UTC,-Time_UTC) %>% #remove time, date
  gather(., Var, Value, -Latitude, -Longitude, -Time, -Depth_sampling, -Depth_station) %>% #turn wide to long while retaining lat/lon
  filter(!is.na(Value)) %>% #remove NA
  left_join(., mapping, by = c("Var")) %>% #join units 
  mutate(Longitude = as.numeric(Longitude),
         Latitude = as.numeric(Latitude),
         Time = mdy(Time)) %>% 
  filter(Latitude > 32, Latitude<50)


#Sanity check
# t1 <- d1[d1$Var == "CTDOXYMOL" ,]$Value
# t <-  d %>% slice(.,-1)
# t <- as.character(t$CTDOXYMOL)
# all(t == t1)

#Read in EPU shapefile
epu <- readOGR(file.path(gis.dir, "EPU_Extended.shp"), verbose = F) 
epu <- as(epu, "sf") #convert to sf object

if(spatial_processing){

  #Test maps
  #All EPUs
  #ggplot() + geom_sf(data = epu)
  
  #Scotian shelf
  # ss <- epu %>% filter(EPU == "SS")
  # ggplot() + geom_sf(data = ss)
  
  #get latitude and longitude for creating SpatialPointsDataFrame
  lat <-  as.numeric(d$Latitude)
  lon <- as.numeric(d$Longitude)
  coords <- data.frame(lon = lon,lat = lat)
  
  #create spdf
  spdf <- SpatialPointsDataFrame(coords = coords, data = coords,
                                 proj4string = CRS(crs))
  #convert to sf
  coords_sf <- st_as_sf(spdf) 
  
  #get intersection for mapping EPUs back to nutrient data
  epu_intersect <- st_intersection(epu, coords_sf)
  #plot(epu_intersect[epu_intersect$EPU == "MAB",])
  
  #Map back to nutrient data frame
  epu_df <- data.frame(Longitude = epu_intersect$lon,
                       Latitude = epu_intersect$lat,
                       EPU = epu_intersect$EPU)
  #join
  NE_LME_nutrients <- d1 %>% 
    left_join(.,epu_df, by = c("Latitude","Longitude"))
  
  #Select data for plotting 
  Nitr <- NE_LME_nutrients %>% filter(Var == "NITRIT.NITRAT")
  
  #Back to SOE format and specify bottom, mid-water, or surface sampling
  NE_LME_nutrients <- NE_LME_nutrients %>%
    dplyr::select(-Latitude, -Longitude) %>% 
    mutate(Value = as.numeric(Value),
           Depth_station = as.numeric(Depth_station),
           Depth_sampling = as.numeric(Depth_sampling)) %>% 
    mutate(bot_dif = Depth_station-Depth_sampling) %>% 
    mutate(surf_bot = ifelse(bot_dif <= 10, "bottom",
                           ifelse(bot_dif > 10 & Depth_sampling <= 5, "surface", "mid-water"))) %>% 
    filter(Value > 0, !is.na(EPU), !Var %in% c("BTLNBR","CASTNO","Depth_sampling",
                                             "Depth_station","STNNBR")) %>% 
    mutate(Var = paste(Var, surf_bot)) %>% 
    dplyr::select(Time, Var, Value, Units, EPU) %>% 
    group_by(EPU, Time = year(Time), Var, Units) %>% 
    dplyr::summarise(Value = mean(Value, na.rm = TRUE)) %>% 
    as.data.frame()
  
  if (save_clean){
    save(NE_LME_nutrients,file = file.path(clean.dir, "EcoMon_nutrients.Rdata"))
  }
} else {
  load(file.path(sample.dir,"sample_nutrients.Rdata"))
  load(file.path(clean.dir,"EcoMon_nutrients.Rdata"))
}

QA

Surface Nutrients

Bottom Nutrients

Synthesis

MAB Corr. Matrix

GB Corr. Matrix

GB Corr. Matrix